# This script runs the distance-weighted regressions required to create Figure S4 (population weights).
# It stores the point estimates, standard errors etc.

library(tidyverse)
library(lfe)
library(ggplot2)
load("combined_dta_long_483.rdata")
dta$id = as.factor(dta$id)

pop_dta = read.csv("ori9_population.csv")

dta = dta %>%
  left_join(pop_dta, by=c("agency" = "ori9"))

dta = dta %>%
  select(-race_single, -race_multi, -...1, -asian, -indig, -white, -platformId)
dta = dta %>%
  mutate(stats_interactions = stats_likeCount+stats_angryCount+stats_loveCount+stats_wowCount+stats_hahaCount+stats_sadCount)

dta = dta %>%
  select(-stats_angryCount, -stats_loveCount, -stats_wowCount, -stats_hahaCount, -stats_sadCount)

crime_classes = unique(dta$crime_class)
crimes = unique(dta$crime)
q = length(crimes)
p = length(crime_classes)
reg_types = c('w_d_agpop')

exp_weight = function(x, decay=0.00005, threshold = 483){
  y = exp(-decay*(x^2))
  y[x>threshold] = 0
  return(y)
}


dta = dta %>%
  mutate(
    w = exp_weight(d)
  ) %>%
  filter(year > 2009 & year < 2020)

results_all_dist = data.frame(type = NA, weights = NA, agency = NA, estimate = NA, cse = NA, se = NA, N = NA, r2 = NA, adjr2 = NA)


for(i in c(1:q)){
  results_new = data.frame(type = NA, weights = NA, agency = NA, estimate = NA, cse = NA, se = NA, N = NA, r2 = NA, adjr2 = NA)
  ct = crimes[i]
  cat(ct, "\n")
  
  new_dta = dta %>%
    filter(crime == ct) %>%
    group_by(agency, type) %>%
    mutate(
      w_d_ag = ifelse(!is.na(w), w / sum(w),
                      1 / n()),
      b = (black > 0)*1
    ) %>%
    mutate(
      w_d_agpop = w_d_ag*pop_ave
    )
  
  for(k in c(1:length(reg_types))){
    reg_type = reg_types[k]
    
    the_reg = felm(b ~ type | agency | 0 | agency + id, data=new_dta[!is.na(new_dta[,reg_type]),]
                   , w=data.frame(new_dta[!is.na(new_dta[,reg_type]),])[,reg_type]
    )
    
    results_new[k,] = c(ct, str_split(reg_type, "_")[[1]][2], grepl('_ag', reg_type)*1, the_reg$beta[1], the_reg$cse[1], the_reg$se[1], the_reg$N, summary(the_reg)$r.squared, summary(the_reg)$adj.r.squared)
  }
  results_all_dist = bind_rows(results_all_dist, results_new)
}



for(i in c(1:p)){
  results_new = data.frame(type = NA, weights = NA, agency = NA, estimate = NA, cse = NA, se = NA, N = NA, r2 = NA, adjr2 = NA)
  ct = crime_classes[i]
  cat(ct, "\n")
  
  new_dta = dta %>%
    filter(crime_class == ct) %>%
    group_by(agency, type) %>%
    mutate(
      w_d_ag = ifelse(!is.na(w), w / sum(w),
                      1 / n()),
      b = (black > 0)*1
    ) %>%
    mutate(
      w_d_agpop = w_d_ag*pop_ave
    )
  
  for(k in c(1:length(reg_types))){
    reg_type = reg_types[k]
    
    the_reg = felm(b ~ type | agency | 0 | agency + id, data=new_dta[!is.na(new_dta[,reg_type]),]
                   , w=data.frame(new_dta[!is.na(new_dta[,reg_type]),])[,reg_type]
    )
    
    results_new[k,] = c(ct, str_split(reg_type, "_")[[1]][2], grepl('_ag', reg_type)*1, the_reg$beta[1], the_reg$cse[1], the_reg$se[1], the_reg$N, summary(the_reg)$r.squared, summary(the_reg)$adj.r.squared)
  }
  results_all_dist = bind_rows(results_all_dist, results_new)
}

results_new = data.frame(type = NA, weights = NA, agency = NA, estimate = NA, cse = NA, se = NA, N = NA, r2 = NA, adjr2 = NA)
cat("all", "\n")

new_dta = dta %>%
  group_by(agency, type) %>%
  mutate(
    w_d_ag = ifelse(!is.na(w), w / sum(w),
                    1 / n()),
    b = (black > 0)*1
  ) %>%
  mutate(
    w_d_agpop = w_d_ag*pop_ave
  )

for(k in c(1:length(reg_types))){
  reg_type = reg_types[k]
  the_reg = felm(b ~ type | agency | 0 | agency + id, data=new_dta[!is.na(new_dta[,reg_type]),]
                 , w=data.frame(new_dta[!is.na(new_dta[,reg_type]),])[,reg_type]
  )
  results_new[k,] = c("all", str_split(reg_type, "_")[[1]][2], grepl('_ag', reg_type)*1, the_reg$beta[1], the_reg$cse[1], the_reg$se[1], the_reg$N, summary(the_reg)$r.squared, summary(the_reg)$adj.r.squared)
}
results_all_dist = bind_rows(results_all_dist, results_new)

results_all_dist = results_all_dist[-1,]
results_all_dist$estimate = as.numeric(results_all_dist$estimate)
results_all_dist$cse = as.numeric(results_all_dist$cse)
results_all_dist$se = as.numeric(results_all_dist$se)
results_all_dist$N = as.numeric(results_all_dist$N)
results_all_dist$r2 = as.numeric(results_all_dist$r2)
results_all_dist$adjr2 = as.numeric(results_all_dist$adjr2)
write_csv(results_all_dist, 'all_results_pop_weights_distance_00005.csv')

